use "$data_out/naive.dta", clear

gen propensity = .

gen month = mofd(encdate)

gen dow = dow(encdate)
egen hym  = group(month clinic)
egen hd = group(clinic dow)
gen temp_dx3 = substr(dx1,1,3)
egen diag = group(temp_dx3)



*drop missing and singleton observations that won't be included in regressions
keep if !mi(age_bin) & !mi(mos) & !mi(longevity) & !mi(married) & !mi(junior) & !mi(college) & !mi(race_white) & !mi(female) & !mi(diag) & !mi(afqt_p) 
gen include = 1
foreach i in diag mos hym hd {
bys `i': replace include = 0 if _N == 1
}
keep if include == 1









reghdfe ed_prescription i.age_bin race_white female junior_enlisted college married longevity afqt_p, absorb(hym hd diag mos) res(residual)
egen mean_residual = mean(residual), by(provID year)

bys provID year: gen N = _N
cap drop propensity
gen propensity = (mean_res * N - residual)/(N-1)
drop N
sort provID year


preserve 
bys provID year: keep if _n ==1

bys year (mean_residual): gen n = _n 
bys year (mean_residual): gen N = _N 
gen above_median = n > N/2
gen quartile = 1 if n <= N/4
replace quartile = 2 if n <= N/2 & n > N/4
replace quartile = 3 if n > N/2 & n <= 3*N/4
replace quartile = 4 if n > 3*N/4
keep provID year above_median quartile

tempfile a
save `a'
restore

merge m:1 provID year using `a', nogen







save "$data_out/provider_propensity", replace
